rest <- na.omit(rest)



zip_vio <- rest %>% group_by(ZIPCODE) %>%
  summarize(Freq = n()) %>%
  ungroup()

zip_vio <- as.data.frame(zip_vio)

avg_price  <- final %>% group_by(ZIPCODE) %>%
  summarize(avg_price = mean(price,na.rm = TRUE)) %>%
  ungroup()

avg_price <- as.data.frame(avg_price)


shp_joined <- sp::merge(shp_joined, zip_vio, by="ZIPCODE", all=F)
shp_joined <- sp::merge(shp_joined, avg_price, by="ZIPCODE", all=F)


zip_popup <- paste0("<strong>Zip code </strong>", 
                    shp_joined$ZIPCODE, 
                    "<br><strong>Score </strong>", 
                    floor(shp_joined$score),
                    "<br><strong>Location </strong>", 
                    shp_joined$PO_NAME)

zip_popup_1 <- paste0("<strong>Zip code </strong>", 
                      shp_joined$ZIPCODE, 
                      "<br><strong>Number of violations </strong>", 
                      shp_joined$Freq,
                      "<br><strong>Location </strong>", 
                      shp_joined$PO_NAME)

zip_popup_2 <- paste0("<strong>Zip code </strong>", 
                      shp_joined$ZIPCODE, 
                      "<br><strong>Average price level (/4) </strong>", 
                      round(shp_joined$avg_price, digits=2),
                      "<br><strong>Location </strong>", 
                      shp_joined$PO_NAME)

leaflet(data = shp_joined) %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(-74, 40.7, 10) %>%
  addPolygons(fillColor = ~pal(score), 
              fillOpacity = 0.8, 
              color = "#BDBDC3", 
              group="Score",
              weight = 1, 
              popup = zip_popup) %>%
  addPolygons(fillColor = ~pal1(Freq), 
              fillOpacity = 0.8, 
              color = "Red", 
              group="Number of violations",
              weight = 1, 
              popup = zip_popup_1) %>%
  addPolygons(fillColor = ~pal2(avg_price), 
              fillOpacity = 0.8, 
              color = "Red", 
              group="Average price",
              weight = 1, 
              popup = zip_popup_2) %>%
  addLayersControl(
    baseGroups =c("Score","Number of violations","Average price"),
    options = layersControlOptions(collapsed=FALSE)
  )

This is an interactive choropleth of New York City’s five boroughs at the zip code level. The zip codes in the first graph (Score) are colored by a metric we devised to represent the culinary diversity of the area (using our cleaned CUISINE DESCRIPTION column) combined with the Yelp ratings of its restaurants. This is based on the Herfindahl index which is normally used to measure market concentration. Generally, it is the size of firms in relation to the industry and an indicator of the level of competition among them - Wikipedia. The higher the score, the darker the zip code. This means it has high diversity and quality of food - the place to be, you could say.

For more clarity: \(\sum_{i} \frac{w_i}{(s_i^2)}\) is our formula. \(w_i\) represents the average rating of restaurants for cuisine \(i\) in a given zip code. \(s_i\) is the fraction of that cuisine in the zip code. For example, if we have 5 Italian restaurants with an average rating of 4 and 15 Greek restaurants with an average rating of 3.5, you would calculate it as \(\frac{4}{0.25^2} + \frac{3.5}{0.75^2}\) The exponent controls how much you reward a zip code for having more diversity. We were worried it was rewarding only diversity and nothing else. It turns out that even if you choose \(s_i^0\) (i.e. you just count the number of cuisines in the zip code), you still end up with the same top ~20 ordering of zip codes. Smaller exponents give more weight to the actual ratings, so we can use something like the square root instead for a more balanced metric, in theory (which we ended up doing).

The second option shows the number of violations per zip code, and the third shows the average price range (on a scale of 1-4).